unit ApproxAlgorithms;

interface

uses
  Windows, math, Common,
     dialogs, sysutils, graphics, GraphUtils;

type

  ApproxSearchAlgorithms = (dp, nfa, countDP, CountNFA);




const ApproxAlgorithmNames  : Array[ApproxSearchalgorithms] of AnsiString
      = (
           'Dynamische Programmierung',
           'Simulierung eines NFAs',
           'Filter (Zeichen zhlen, DP)',
           'Filter (Zeichen zhlen, NFA)'
        );

      ApproxColors: Array[ApproxSearchAlgorithms] of TColor
       = (clBlue, clMaroon , clSkyBlue, clRed );

      EM_ABSOLUTE = 0;
      EM_RELATIVE = 1;

type

  TApproxAlgTimeArray = Array[ApproxSearchAlgorithms] of Int64;
  TApproxTrefferArray = Array[ApproxSearchAlgorithms] of TApproxTrefferList;
  TApproxTimesArray = Array of TApproxAlgTimeArray;

  TApproxStatistics = Array of Array of TApproxAlgTimeArray;

function min3(a,b,c: Integer): Integer;

function OutputApproxTrefferList(TrefferList: TApproxTrefferList): String;

function SearchDP(t, p: AnsiString; eq: Integer; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Integer;

function PreProcess_NFA(p: AnsiString): TBC_CardinalArray;
function SearchNFA(t, p: AnsiString; eq: Integer; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Integer;

function PreProcess_FilterCount(p: AnsiString): TBC_IntArray;
function SearchFilterCountDP(t, p: AnsiString; eq: Integer; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Integer;
function SearchFilterCountNFA(t, p: AnsiString; eq: Integer; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Integer;




function ApproxSearch(t, p: AnsiString; eq: Integer; alg: ApproxSearchAlgorithms; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Boolean;

implementation


function min3(a,b,c: Integer): Integer;
begin
  result := a;
  if b < result then result := b;
  if c < result then result := c;
end;

function OutputApproxTrefferList(TrefferList: TApproxTrefferList): String;
var i: Integer;
begin
  result := '';
  if assigned(Trefferlist) then
  begin
      for i := 0 to TrefferList.Count - 1 do
      begin
          result := result +
          Format('%d (%d), ',
          [
          TApproxTreffer(TrefferList[i]).Index,
          TApproxTreffer(TrefferList[i]).Error
          ]);
      end;
  end;
end;

function SearchDP(t, p: AnsiString; eq: Integer; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Integer;
var m, n, k, lact, i, IdxT, pC, nC: Integer;
    C: Array of Integer;
begin
  n := length(t);
  m := length(p);
  // max. erlaubte Fehlerzahl
  case ErrorMode of
    EM_ABSOLUTE: k := eq;
    EM_RELATIVE: k := (eq * m) div 100
  else
    k := 0;
  end;

  // Initialisierung
  setlength(C, m+1);
  for i := 0 to m do C[i] := i;
  lact := k+1;
  result := 0;

  // Suchen
  for IdxT := 1 to n do
  begin
      pC := 0;
      nC := 0;
      for i := 1 to lact do
      begin
          if p[i] = t[IdxT] then
            nC := pC
          else
            nC := 1 + min3(nC, pC, C[i]);
          pC := C[i];
          C[i] := nC;
      end;
      // nchste letzte aktive Zelle suchen
      while C[lact] > k do dec(lact);

      if lact = m then
      begin
          if result = 0 then result := IdxT;
          if assigned(Trefferlist) then
                    Trefferlist.Add(TApproxTreffer.Create(IdxT, C[lact]));
      end else
        inc(lact);
  end;
end;


// Preprocessing
function PreProcess_NFA(p: AnsiString): TBC_CardinalArray;
var i,m: Integer;
  j: cardinal;
  c: Char;
begin
  for c := low(Char) to High(Char) do
    result[c] := 0;
  //ZeroMemory(@result[Chr(0)],length(result)*SizeOf(Cardinal));
  m := Length(p);
  j := 1;
  for i := 1 to m do
  begin
    result[p[i]] := result[p[i]] or j;
    j := j shl 1;
  end;
end;

function SearchNFA(t, p: AnsiString; eq: Integer; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Integer;
var i, m, n, k, IdxT, kout: Integer;
  oldR, newR, mask: Cardinal;
  B: TBC_CardinalArray;
  R: Array of Cardinal;

begin
  // Vorbereitung
  m := Length(p);
  n := length(t);
  // max. erlaubte Fehlerzahl
  case ErrorMode of
    EM_ABSOLUTE: k := eq;
    EM_RELATIVE: k := (eq * m) div 100
  else
    k := 0;
  end;
  result := 0;

  // Nur Muster mit Lnge <= 32 suchen
  if m > 32 then
    result := -1
  else
  begin
      B := PreProcess_NFA(p);
      mask := 1 shl (m-1);
      Setlength(R, k+1);
      R[0] := 0;
      for i := 1 to k do R[i] := (R[i-1] shl 1) or 1;

      for IdxT := 1 to n do
      begin
        oldR := R[0];
        newR := ((oldR shl 1) or 1) and B[t[IdxT]];

        kout := k + 1;
          if (newR and mask) <> 0 then
            kout := 0;

        R[0] := newR;
        for i := 1 to k do
        begin
            newR :=
                   ((R[i] shl 1) and B[t[IdxT]])
                OR oldR
                OR ((oldR OR newR) shl 1)
                or 1;
            oldR := R[i];
            R[i] := newR;

            // zur Bestimmung des Fehlers bei der Ausgabe ntig
            if ((newR and mask) <> 0) and (i < kout) then
              kout := i;
        end;
        //if (newR and mask) <> 0 then
        if kout < k+1 then
        begin
          if result <> 0 then result := IdxT;
          if assigned(Trefferlist) then
                    Trefferlist.Add(TApproxTreffer.Create(IdxT, kout));
        end;
      end;
  end;
end;


function PreProcess_FilterCount(p: AnsiString): TBC_IntArray;
var c: Char;
    m, i: Integer;
begin
  m := Length(p);
  for c := Low(Char) to High(Char) do
    Result[c] := 0;
  for i := 1 to m do
    result[p[i]] := result[p[i]] + 1;
end;

function SearchFilterCountDP(t, p: AnsiString; eq: Integer; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Integer;
var n, m, k, IdxT, count: Integer;
    A: TBC_IntArray;
    lact, pC, nC, LastStop: Integer;
    C: Array of Integer;

    procedure Check(Start, Ende: Integer);
    var i, IdxTCheck, s: Integer;
    begin

            if Start > LastStop then
            begin
                // Neue Suche initialisieren
                for i := 0 to m do C[i] := i;
                lact := k+1;
                s := start;
            end else
                s := LastStop + 1;
            // (Weiter-)Suchen
            for IdxTCheck := s to Ende do
            begin
                pC := 0;
                nC := 0;
                for i := 1 to lact do
                begin
                    if p[i] = t[IdxTCheck] then
                      nC := pC
                    else
                      nC := 1 + min3(nC, pC, C[i]);
                    pC := C[i];
                    C[i] := nC;
                end;
                // nchste letzte aktive Zelle suchen
                while C[lact] > k do dec(lact);

                if lact = m then
                begin
                    //if result = 0 then result := IdxTCheck;
                    if assigned(Trefferlist) then
                              Trefferlist.Add(TApproxTreffer.Create(IdxTCheck, C[lact]));
                end else
                  inc(lact);
            end;
            LastStop := Ende;

    end;
begin
  n := length(t);
  m := length(p);
  // max. erlaubte Fehlerzahl
  case ErrorMode of
    EM_ABSOLUTE: k := eq;
    EM_RELATIVE: k := (eq * m) div 100
  else
    k := 0;
  end;
  result := 0;

  // Vorbereitung
  A := PreProcess_FilterCount(p);
  setlength(C, m+1); //(Fr die berprfung)
  LastStop := -1;
  Count := 0;

  // Suchphase
  // Initiales Suchfenster fllen
  for IdxT := 1 to m do
  begin
      if A[t[IdxT]] > 0 then inc(Count);
      A[t[IdxT]] := A[t[IdxT]] - 1;
  end;

  //----------------------
  If Count >= m-k then   // berprfung starten
      Check(1, m);
      ;
  //----------------------

  // Rest des Textes durchlaufen
  For IdxT := m+1 to n do
  begin
      if A[t[IdxT-m]] >= 0 then dec(Count);
      A[t[IdxT-m]] := A[t[IdxT-m]] + 1;
      if A[t[IdxT]] > 0 then inc(Count);
      A[t[IdxT]] := A[t[IdxT]] - 1;

      If Count >= m-k then //; // berprfung starten/fortsetzen
        Check(IdxT-m+1, IdxT);
  end;
end;

function SearchFilterCountNFA(t, p: AnsiString; eq: Integer; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Integer;
var n, m, k, IdxT, count: Integer;
    A: TBC_IntArray;

    oldR, newR, mask: Cardinal;
    B: TBC_CardinalArray;
    R: Array of Cardinal;

    LastStop, kout: Integer;



    procedure Check(Start, Ende: Integer);
    var i, IdxTCheck: Integer;
    begin

            if Start > LastStop then
            begin
                // Neue Suche initialisieren
                R[0] := 0;
                for i := 1 to k do R[i] := (R[i-1] shl 1) or 1;
            end else
                start := LastStop + 1;

            // (Weiter-)Suchen
            for IdxTCheck := start to Ende do
            begin
                oldR := R[0];
                newR := ((oldR shl 1) or 1) and B[t[IdxTCheck]];

                kout := k + 1;
                  if (newR and mask) <> 0 then
                    kout := 0;

                R[0] := newR;
                for i := 1 to k do
                begin
                    newR :=
                           ((R[i] shl 1) and B[t[IdxTCheck]])
                        OR oldR
                        OR ((oldR OR newR) shl 1)
                        or 1;
                    oldR := R[i];
                    R[i] := newR;

                    // zur Bestimmung des Fehlers bei der Ausgabe ntig
                    if ((newR and mask) <> 0) and (i < kout) then
                      kout := i;
                end;
                //if (newR and mask) <> 0 then
                if kout < k+1 then
                begin
                  if assigned(Trefferlist) then
                            Trefferlist.Add(TApproxTreffer.Create(IdxTCheck, kout));
                end;
            end;
            LastStop := Ende;

    end;
begin
  n := length(t);
  m := length(p);
  // max. erlaubte Fehlerzahl
  case ErrorMode of
    EM_ABSOLUTE: k := eq;
    EM_RELATIVE: k := (eq * m) div 100
  else
    k := 0;
  end;
  result := 0;

  if m > 32 then
    result := -1
  else
  begin
      // Vorbereitung
      B := PreProcess_NFA(p);
      mask := 1 shl (m-1);
      Setlength(R, k+1);


      A := PreProcess_FilterCount(p);
      LastStop := -1;
      Count := 0;

      // Suchphase
      // Initiales Suchfenster fllen
      for IdxT := 1 to m do
      begin
          if A[t[IdxT]] > 0 then inc(Count);
          A[t[IdxT]] := A[t[IdxT]] - 1;
      end;

      If Count >= m-k then
          Check(1, m);

      // Rest des Textes durchlaufen
      For IdxT := m+1 to n do
      begin
          if A[t[IdxT-m]] >= 0 then dec(Count);
          A[t[IdxT-m]] := A[t[IdxT-m]] + 1;
          if A[t[IdxT]] > 0 then inc(Count);
          A[t[IdxT]] := A[t[IdxT]] - 1;

          If Count >= m-k then  // berprfung starten/fortsetzen
              Check(IdxT-m+1, IdxT);
      end;
  end;
end;


function ApproxSearch(t, p: AnsiString; eq: Integer; alg: ApproxSearchAlgorithms; TrefferList: TApproxTrefferList; ErrorMode: Byte ): Boolean;
var idx: Integer;
begin
  case alg of
      dp      : idx := SearchDP(t, p, eq, TrefferList,ErrorMode);
      nfa     : idx := SearchNFA(t, p, eq, TrefferList,ErrorMode);
      countDP : idx := SearchFilterCountDP(t, p, eq, TrefferList,ErrorMode);
      countNFA: idx := SearchFilterCountNFA(t, p, eq, TrefferList,ErrorMode);
  else
    idx := -1;
  end;

  result := idx > -1;
end;


end.
